package com.algo.sentiment;

/**
 * Created by supertool on 2014/10/29.
 */

import org.ansj.domain.Term;
import org.ansj.library.UserDefineLibrary;
import org.ansj.splitWord.analysis.BaseAnalysis;
import org.ansj.splitWord.analysis.IndexAnalysis;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.javatuples.Pair;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class ChineseWordsSegment {
    private void cutAndInsert(String line) {
        String[] splitList = line.trim().split("\\s+");
        Integer size = splitList.length;
        String keyword = "";
        String nature = "userDefine";
        Integer freq = 1000;

        switch (size) {
            case 3:
                keyword = splitList[0];
                nature = splitList[1];
                freq=Integer.parseInt(splitList[2]);
                break;
            case 2:
                keyword = splitList[0];
                nature = splitList[1];
                break;
            case 1:
                keyword = splitList[0];
                break;
            default:
                return;
        }
        UserDefineLibrary.insertWord(keyword, nature, freq);
    }

    public void initDict(String userDict) {
        InputStream is = null;
        BufferedReader bufferedReader = null;
        try {
            ClassLoader classloader = Thread.currentThread().getContextClassLoader();
            is = classloader.getResourceAsStream(userDict);
            bufferedReader = new BufferedReader(new InputStreamReader(is));
            String line = bufferedReader.readLine();
            while (line != null) {
                cutAndInsert(line);
                line = bufferedReader.readLine();
            }
            bufferedReader.close();
            is.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
//        NlpAnalysis.parse("cws");
        ToAnalysis.parse("cws");
    }


    public List<Pair<String,String>> segmentWordsWithNature(String line) {
        List<Term> termList = NlpAnalysis.parse(line);
        List<Pair<String, String>> ans = new ArrayList<Pair<String, String>>();
        for (Term term : termList) {
            Pair<String, String> pair = new Pair<String, String>(term.getName(), term.getNatureStr());
            ans.add(pair);
        }
        return ans;
    }

    // 1 for base analysis, 2 for to analysis, 3 for nlp analysis, 4 for index analysis

    public List<Pair<String,String>> segmentWordsWithFilledNature(String line, Integer type) {
        List<Term> termList = null;
        switch (type) {
            case 1:
                termList = BaseAnalysis.parse(line);
                break;
            case 2:
                termList = ToAnalysis.parse(line);
                break;
            case 3:
                termList = NlpAnalysis.parse(line);
                break;
            case 4:
                termList = IndexAnalysis.parse(line);
                break;
            default:
                return null;
        }
        List<Pair<String, String>> ans = new ArrayList<Pair<String, String>>();
        for (Term term : termList) {
            if(term.getNatureStr()!=" ") {
                Pair<String, String> pair = new Pair<String, String>(term.getName(), term.getNatureStr());
                ans.add(pair);
            }
        }
        return ans;
    }

    public List<Pair<String,String>> segmentWordsWithFilledNatureBase(String line) {
        return segmentWordsWithFilledNature(line, 1);
    }

    public List<Pair<String,String>> segmentWordsWithFilledNatureTo(String line) {
        return segmentWordsWithFilledNature(line, 2);
    }

    public List<Pair<String,String>> segmentWordsWithFilledNatureNlp(String line) {
        return segmentWordsWithFilledNature(line, 3);
    }

    public List<Pair<String,String>> segmentWordsWithFilledNatureIndex(String line) {
        return segmentWordsWithFilledNature(line, 4);
    }

    public List<String> segmentWords(String line) {
        List<Term> termList = NlpAnalysis.parse(line);
        List<String> ans = new ArrayList<String>();
        for (Term term : termList) {
            ans.add(term.getName());
        }
        return ans;
    }

    public static void main(String[] args) {
        ChineseWordsSegment cws = new ChineseWordsSegment();
        cws.initDict("dict/user_defined.dic");
    }
}
